import urllib.request as ur
import lxml.etree as le
import user_agent

keyword = input('请输入关键词:')
pn_start = int(input('起始页:'))
pn_end = int(input('终止页:'))

def getRequest(url):
    return ur.Request(
        url=url,
        headers={
            'User-Agent':user_agent.get_user_agent_pc(),
            'Cookie':'uuid_tt_dd=10_35467775060-1557741868717-953969; __yadk_uid=0L1DqTWians3J6Jr1tAfbh68RGVinLsq; _ga=GA1.2.1407227075.1561511302; smidV2=201907071353376979640977d43c1f3752e4644c69389900409c894d62cdbf0; dc_session_id=10_1563844380231.377585; Hm_ct_e5ef47b9f471504959267fd614d579cd=5744*1*myk082610!6525*1*10_35467775060-1557741868717-953969; Hm_lvt_7a55576465fd255135675045604b87b1=1569153247; Hm_ct_7a55576465fd255135675045604b87b1=5744*1*myk082610!6525*1*10_35467775060-1557741868717-953969; Hm_lvt_e5ef47b9f471504959267fd614d579cd=1569217022,1569289422,1569290682,1571031839; acw_tc=2760822715775305660062421ed8df788dbc24e9d37552313d610f61281eb2; UserName=myk082610; UserInfo=ccdf4204c6964a06a14d2cf7383eec96; UserToken=ccdf4204c6964a06a14d2cf7383eec96; UserNick=myk082610; AU=1F3; UN=myk082610; BT=1577669274756; p_uid=U100000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_35467775060-1557741868717-953969!5744*1*myk082610!1788*1*PC_VC; TY_SESSION_ID=e4309f5b-c08b-4a50-a7a7-93cc59d1062e; searchHistoryArray=%255B%2522Python%25E8%25BF%259B%25E9%2598%25B6%25E4%25B9%258B%25E8%25B7%25AF%2522%252C%2522mac--maridb%2522%252C%2522Python%25E5%2590%258E%25E5%258F%25B0%25E5%25BC%2580%25E5%258F%2591%2522%255D; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1578295016,1578295585,1578381332,1578487863; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblog.csdn.net%252Fblogdevteam%252Farticle%252Fdetails%252F103603408%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; firstDie=1; dc_tos=q3tw8h; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1578554226'
        }
    )

def getProxyOpener():
    proxy_address = ur.urlopen('http://api.ip.data5u.com/dynamic/get.html?order=d314e5e5e19b0dfd19762f98308114ba&sep=4').read().decode('utf-8').strip()
    proxy_handler = ur.ProxyHandler(
        {
            'http':proxy_address
        }
    )
    return ur.build_opener(proxy_handler)


for pn in range(pn_start,pn_end+1):
    request = getRequest(
        'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn,keyword)
    )
    try:
        response = getProxyOpener().open(request).read()
        print(response)
        href_s = le.HTML(response).xpath('//div[@class="limit_width"]/a[position()=1]/@href')
        print(href_s)
        for href in href_s:
            try:
                response_blog = getProxyOpener().open(
                    getRequest(href)
                ).read()
                title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')[0]
                print(title)
                with open('blog/%s.html' % title,'wb') as f:
                    f.write(response_blog)
            except Exception as e:
                print(e)
    except:pass



